propop::propop_tables(
parameters,
population,
year_first,
year_last,
scenarios = NULL,
age_groups = 101,
fert_first = 16,
fert_last = 50,
share_born_female = 100 / 205,
subregional = NULL, # = "net" for net migration; = "rate" for emi- and immigration rates
binational = TRUE,
spatial_unit = "spatial_unit"
)WIP: New function propop::propop_tables()
Run projections with tibbles, purrr and dplyr
Introduction
propop::propop_tables() is a dplyr-version of propop::propop() (matrices).
✅modular structure
✅uses dplyr
✅more flexibility for adding/removing columns and new modules
🟨 for now, both functions will be kept and maintained -> to be discussed
Overview: propop_tables()
Function arguments
The new function’s arguments are identical to propop::propop():
(only the first four arguments in blue are mandatory)
Structure
Excerpt from propop_tables()
# [...]
# Run projection ----
# iterate across spatial units, scenarios and years
df_result <- purrr::reduce(
.x = list_parameters,
.f = \(population, parameters) project_population(
population, parameters,
subregional = subregional
),
.init = init_population
)
# [...]Performance for one region (Canton of Aargau)
We use STAT-TAB data as described in the vignette for projecting a single region:
# Run propop with tables (new feature)
system.time({
result_tables <- propop::propop_tables(
parameters = fso_parameters,
year_first = 2024,
year_last = 2055,
population = fso_population,
binational = TRUE
)
}) user system elapsed
22.303 0.045 22.440
# Run propop with matrices (original)
system.time({
result_matrices <- propop::propop(
parameters = fso_parameters,
year_first = 2024,
year_last = 2055,
population = fso_population,
binational = TRUE
)
}) user system elapsed
5.760 0.040 5.839
Comparison between FSO-results, propop::propop_tables() and propop::propop()
Performance for five subregions
Show/hide example code
# FSO parameters for fictitious subregions
fso_parameters_sub <- fso_parameters |>
# duplicating rows 5 times
tidyr::uncount(5) |>
# create 5 subregions
dplyr::mutate(spatial_unit = rep(1:5, times = nrow(fso_parameters))) |>
dplyr::mutate(spatial_unit = as.character(spatial_unit))
# Generate 5 random "cuts" to distribute the original population;
# avoid extreme values with a range of 0.1 to 0.5
cut_1 <- {
set.seed(1)
round(runif(1, min = 0.1, max = 0.5), digits = 2)
}
cut_2 <- {
set.seed(2)
round(runif(1, min = 0.1, max = 0.5), digits = 2)
}
cut_3 <- {
set.seed(3)
round(runif(1, min = 0.1, max = 0.5), digits = 2)
}
cut_4 <- {
set.seed(4)
round(runif(1, min = 0.1, max = 0.5), digits = 2)
}
# make sure everything adds up to 100%
cut_5 <- 1 - cut_1 - cut_2 - cut_3 - cut_4
# Generate population data for five subregions
df_population_sub <- fso_population |>
# duplicating rows 5 times
tidyr::uncount(5) |>
# create 5 subregions
dplyr::mutate(
spatial_unit = as.character(rep(1:5, times = nrow(fso_population)))
) |>
dplyr::mutate(
# Distribute original population according to "cuts"
n = dplyr::case_match(
spatial_unit,
"1" ~ round(n * cut_1),
"2" ~ round(n * cut_2),
"3" ~ round(n * cut_3),
"4" ~ round(n * cut_4),
"5" ~ round(n * cut_5),
.default = NA
),
.keep = "all"
)
# Prepare subregional migration
parameters_sub_mig <- fso_parameters_sub |>
# Create fictitious migration parameters
dplyr::mutate(
mig_sub = dplyr::case_when(
# Four regions with emigration, 1 region with immigration
spatial_unit == 1 ~ {
set.seed(1)
round(rnorm(1, mean = 0, sd = 0.2), digits = 4)
},
spatial_unit == 2 ~ {
set.seed(2)
round(rnorm(1, mean = 0, sd = 0.2), digits = 4)
},
spatial_unit == 3 ~ {
set.seed(25)
round(rnorm(1, mean = 0, sd = 0.2), digits = 4)
},
spatial_unit == 4 ~ {
set.seed(12)
round(rnorm(1, mean = 0, sd = 0.2), digits = 4)
},
TRUE ~ NA
)
) |>
dplyr::mutate(
mig_sub = dplyr::case_when(
spatial_unit == 5 ~ 0 - sum(mig_sub, na.rm = TRUE), TRUE ~ mig_sub
),
check = sum(mig_sub, na.rm = TRUE),
.by = c("nat", "sex", "age", "year", "scen")
) |>
dplyr::select(
nat, sex, age, year, scen, spatial_unit, birthrate, int_mothers, mor,
emi_int, emi_nat, imm_int_n, imm_nat_n, acq, emi_nat_n, mig_nat_n, mig_sub
)
# Calculate shares
df_population_shares <- df_population_sub |>
dplyr::mutate(sum_n = sum(n), .by = c(nat, sex, age)) |>
dplyr::mutate(
share = n / sum_n,
share = case_when(
sum_n == 0 ~ 0,
.default = share
)
)
# Apply shares
parameters_sub_size <- parameters_sub_mig |>
dplyr::left_join(
df_population_shares |>
dplyr::select("spatial_unit", "nat", "sex", "age", "share"),
by = c("spatial_unit", "nat", "sex", "age")
) |>
dplyr::mutate(
# Calculate number of incoming people per demographic group and spatial unit
imm_int_n_distr = imm_int_n * share,
imm_nat_n_distr = imm_nat_n * share
) |>
dplyr::mutate(
imm_int_n = imm_int_n_distr,
imm_nat_n = imm_nat_n_distr
)# Run propop with tables
system.time({
result_tables_sub <- propop::propop_tables(
parameters = parameters_sub_size,
year_first = 2024,
year_last = 2055,
population = df_population_sub,
subregional = "net",
binational = TRUE
)
}) user system elapsed
55.507 0.084 55.772
# Run propop with matrices
system.time({
result_matrices_sub <- propop::propop(
parameters = parameters_sub_size,
year_first = 2024,
year_last = 2055,
population = df_population_sub,
subregional = TRUE,
binational = TRUE
)
}) user system elapsed
19.900 0.040 20.029
Next steps
Fine tuning for rates (esp. people aged 80 years and older)
☒ No bugs were found. Results for population components in the output still differ a bit from the original (propop()), but only for components that are based on rates.
☒ Comparison with FSO-results
Subregions
☒ propop_tables() runs for subregions
☒ Two methods enabled for the subregional distribution of migration (net numbers or rates)
☐ Evaluate computation speed and accuracy (tests with real data)
Code cleaning, function-feedback, package tests
☒ Code is more or less clean
☒ Function feedback is there
☐ Package tests (mostly completed, only missing crossover tests between the matrices and tables)
☐ Documentation (e.g. a new vignette)
Integration into the main branch: should propop_tables() remain as a separate function or become an argument to select in propop()?)